#!/usr/bin/perl

# -----------------------------------------------------------------------------
# check_bandwidth 1.0.1
#    ~ Nagios(r) SNMP Network Traffic Monitor Plugin
#    ~ Copyright 2007, Jonathan Wright <jonathan (at) jabwebsolutions.co.uk>
#    ~ based on the check_traffic plugin by Adrian Weiczorek
#      and check_snmp_cisco_ifstatus by Altinity Limited
# -----------------------------------------------------------------------------
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307
# -----------------------------------------------------------------------------
# Note: Nagios is a registered trademark of Ethan Galstad. 

# setup Perl
use strict;
use diagnostics;
use warnings;

# import modules
use Net::SNMP;

# -----------------------------------------------------------------------------
# define the variabes we're going to need and then fill them
our ($_storage, %_oid, %_status, %_options, $_cycle);
our ($session, $error, $message);

# location to store traffic information from last SNMP poll
$_storage  = '/tmp/.traffic';

# location of the programs we're going to use later
%_oid = (
  'sysDesc'       => ".1.3.6.1.2.1.1.1.0",     # generic system details
  'ifNumber'      => ".1.3.6.1.2.1.2.1.0",     # number of interfaces
  # for the following, append interface number to obtain value
  'ifDescription' => '.1.3.6.1.2.1.2.2.1.2.',  # interface description
  'ifType'        => '.1.3.6.1.2.1.2.2.1.3.',  # interface type
  'ifSpeed'       => '.1.3.6.1.2.1.2.2.1.5.',  # interface bandwidth limit
  'ifConnected'   => '.1.3.6.1.2.1.2.2.1.7.',  # interface up/down (physically)
  'ifEnabled'     => '.1.3.6.1.2.1.2.2.1.8.',  # interface up/down (software)
  'ifReceived'    => '.1.3.6.1.2.1.2.2.1.10.', # interface bytes in
  'ifTransmitted' => '.1.3.6.1.2.1.2.2.1.16.'  # interface bytes out
);

# status codes to return upon various conditions (as understood by Nagios(r))
%_status   = (
  'UNKNOWN'  => '-1',
  'OK'       => '0',
  'WARNING'  => '1',
  'CRITICAL' => '2'
);

# command-line controlled options (including defaults)
%_options = (
  'community'  => 'public', 
  'version'    => '2c',
  'hostname'   => '',
  'interface'  => '',
  'override'   => 0,
  'warning'    => 75,
  'critical'   => 90,
  'timeout'    => 15,
  'on-the-fly' => 0,
  'check-up'   => 0,
  'pause'      => 10,
  'use-bytes'  => 0,
  'use-mega'  => 0
);

# calculate the point at which the counter will cycle (based on 32-bit counter):
# TODO: move this code to the bandwidth calculation section and work out how to
#       test whether the counter is 32-bit or 64-bit (and process accordingly).
$_cycle = ((1<<31)*2);

# -----------------------------------------------------------------------------
# start the program by processing the command-line options
while (my $arg = shift) {
  # test against known arguments, and assign variables based on them
  if    ($arg =~ /^-(H|-hostname)$/)   { $_options{'hostname'}   = shift;     }
  elsif ($arg =~ /^-(C|-community)$/)  { $_options{'community'}  = shift;     }
  elsif ($arg =~ /^-(i|-interface)$/)  { $_options{'interface'}  = shift;     }
  elsif ($arg =~ /^-(o|-override)$/)   { $_options{'override'}   = shift;     }
  elsif ($arg =~ /^-(w|-warning)$/)    { $_options{'warning'}    = uc(shift); }
  elsif ($arg =~ /^-(c|-critical)$/)   { $_options{'critical'}   = uc(shift); }
  elsif ($arg =~ /^-(t|-timeout)$/)    { $_options{'timeout'}    = shift;     }
  elsif ($arg =~ /^-(p|-pause)$/)      { $_options{'pause'}      = shift;     }
  elsif ($arg =~ /^-(f|-on-the-fly)$/) { $_options{'on-the-fly'} = 1;         }
  elsif ($arg =~ /^-(u|-check-up)$/)   { $_options{'check-up'}   = 1;         }
  elsif ($arg =~ /^-(b|-use-bytes)$/)  { $_options{'use-bytes'}  = 1;         }
  elsif ($arg =~ /^-(m|-use-mega)$/)   { $_options{'use-mega'}  = 1;         }
  elsif ($arg =~ /^-(h|-help)$/)       { usage(1); }
  else {
    # if argument is unknown, then output error and exit
    print "Unknown option: $arg\n";
    usage(0);
  }
}


# before we use the variables/options from the command line, we need to make
#  sure they are in the correct format and are usable first
foreach my $key (keys %_options) {
  if (($key eq 'hostname' || $key eq 'interface') && $_options{$key} eq '') {
    issue('No value given for --'.$key);
  }
    
  if ($key eq 'hostname') {
    issue('Invalid hostname given ('.$_options{$key}.')')
      unless (lc($_options{$key}) =~ /^[a-z0-9_-]+(\.[a-z0-9_-]+)+$/);
  }

  if ($key eq 'warning' || $key eq 'critical') {
    issue('Invalid value given for --'.$key)
      unless ($_options{$key} =~ /^[0-9]+[KMGEP]?(,[0-9]+[KMGEP]?)?$/);
  }

  if ($key eq 'override' || $key eq 'timeout' || $key eq 'pause') {
    issue('Invalid value given for --'.$key)
      unless ($_options{$key} =~ /^[0-9]+$/);
  }
}

# begin by trying to esablish a connection with the SNMP interface with the
#  device under the community given.
($session, $error) = Net::SNMP->session(
  -timeout   => $_options{'timeout'},
  -version   => $_options{'version'},
  -hostname  => $_options{'hostname'},
  -community => $_options{'community'}
);

# if we cannot get the sysDesc OID, assume that the attempt has failed
critical('SNMP agent not responding ('.$error.'): Check settings & try again')
  unless (defined($session->get_request($_oid{'sysDesc'})));

# register variables we're going to use in the next section
our($return, %count, %recount, $cache);

# find the interface on the system - append the interface number on the end
#  of the ifType, etc. OID's. first, find the number of interfaces and made
#  sure we're within the available limit
critical('Cannot get number of interfaces from SNMP agent')
  unless (defined($return = $session->get_request($_oid{'ifNumber'})));
critical(
  'Interface value invalid: '.$return->{$_oid{'ifNumber'}}.' available; '.
  'trying to use interface '.$_options{'interface'}
) if ($_options{'interface'} > $return->{$_oid{'ifNumber'}});

# run a check against the interface if requested
# TODO: Known Bug - Net::SNMP always returns 1 for the interface status,
#       regardless of whether the interface is up or down
#       (SNMP reports 'INTEGER: down(2)' which is evaluated to 1, not 0)
check_up() if ($_options{'check-up'});

# unless on-the-fly requested, try to find and open the cache file for the
#  interface, retrieving the previous values
$cache = $_storage.'/'.$_options{'hostname'}.'/'.$_options{'interface'};
if ($_options{'on-the-fly'} || !(-e $cache)) {
  # fetch the inital set of results from the device before sleeping for the
  #  alloted time to calculate the bandwidth value
  %count = get_count();
  sleep($_options{'pause'});
} else {
  # open the file and get the contents
  open (CACHE, "< ".$cache) or
    critical('Cannot open interface cache file for reading');
    while (<CACHE>) {
      # split each line and add it into the %count hash
      chomp;
      my ($key, $value) = split(':');
      $count{$key} = $value;
    }
  close(CACHE);

  unless (
    # make sure that there is no chance that the counter has cycled around
    #  (i.e. the counter hasn't cycled based on the speed of the interface)
    #  TODO: this may be too short - look into coding comparison between
    #        this and a minimum period the data should be valid for (10m)
    ((time - $count{'ifTimeStamp'}) < ($_cycle/($count{'ifSpeed'}/8))) &&
    # make sure we have values for tx and rx
    (($count{'ifReceived'} > 0) && ($count{'ifTransmitted'} > 0))
  ) {
    # if any of these fail, ignore the values and fetch a fresh set of results
    #  from the device, before sleeping the alloted time
    %count = get_count();
    sleep($_options{'pause'});
  }
}

# fetch the next set of values from the device which will be calculated
#  against the original set to work out bandwidth
%recount = get_count();

# close the SNMP session now, as it's no longer needed
$session->close();

# save the data from %recount above back into the cache file, but first check
#  that the required directories exist first so that we can write the file
unless ($_options{'on-the-fly'}) {
  # check (and make) the main root directory for the storage
  mkdir ($_storage)
    or critical('Cannot create storage directory')
    unless (-e $_storage);
  # check (and make) the hostname directory for the interface files
  mkdir ($_storage.'/'.$_options{'hostname'})
    or critical('Cannot create storage directory for hostname')
    unless (-e $_storage.'/'.$_options{'hostname'});
  # save the data, one file per interface
  open (CACHE, "> ".$cache) or
    critical('Cannot open interface cache file for writing');
    foreach my $key (keys %recount) {
      print CACHE $key.":".$recount{$key}."\n";
    }
  close(CACHE);
}

# register variables we're going to use in the next section
our($tx, $rx, $report, $status);

# calculate the bandwidth used (data transferred/time), but to prevent negative
#  transfers (and associated issues with limits), if the newer recount value is
#  less than the cached value, the counter has cycled. Therefore, add the
#  maximum possible value (at which the counter cycled) to the newer records
$recount{'ifReceived'} += $_cycle
  if ($recount{'ifReceived'} < $count{'ifReceived'});
$recount{'ifTransmitted'} += $_cycle
  if ($recount{'ifTransmitted'} < $count{'ifTransmitted'});
# calculate bandwidth used (tx/rx are in bytes, convert to bits as all 
#  internal data and calcualtions should be done in bits)
$rx = (8*($recount{'ifReceived'} - $count{'ifReceived'})) /
  ($recount{'ifTimeStamp'} - $count{'ifTimeStamp'});
$tx = (8*($recount{'ifTransmitted'} - $count{'ifTransmitted'})) / 
  ($recount{'ifTimeStamp'} - $count{'ifTimeStamp'});

# create the text report which we're going to send back to Nagios(r) (and can be
#  read by the admin via the site or via a notice)
$report = sprintf(
  # Nagios(r) Plugin Report: 'Status Information|Performance Data'
  'Out: %sps; In: %sps (Sent %s, Received %s in %s seconds)|Port %s (Current Sent: %s; Current Received: %s)',
  adjust($tx), adjust($rx),
  adjust($recount{'ifTransmitted'}-$count{'ifTransmitted'}),
  adjust($recount{'ifReceived'}-$count{'ifReceived'}),
  ($recount{'ifTimeStamp'}-$count{'ifTimeStamp'}),
  $recount{'ifDescription'},
  adjust($recount{'ifTransmitted'}),
  adjust($recount{'ifReceived'})
);

# test the values calculated against the warning and critical limits
#  given and therefore work out which status should be reported to Naguis.
#  First, register the variables we're going to be using
our (%levels, $tx_limit, $rx_limit);

# To minimise duplication of code, create a hash table with  the keys as the
#  levels, pointing to the de-referenced sub-routines which will report
#  the status back to Nagios(r)
%levels = (
  # start with CRITICAL first, as it's higher than WARNING and can be 
  #  triggered, even if WARNING has a higher trigger than CRITICAL
  'critical' => \&critical,
  'warning'  => \&warning
);

# process each level, and break down the limits into their tx/rx values so we
#  can test them against the 
foreach my $key (keys %levels) {
  # check if we have two separate values for tx/rx limits at this level
  if ($_options{$key} =~ /^[0-9]+[KMGEP]?,[0-9]+[KMGEP]?$/) {
    # yep, so split
    ($tx_limit, $rx_limit) = split(',', $_options{$key});
  } else {
    # nope, so duplicate
    ($tx_limit, $rx_limit) = ($_options{$key}, $_options{$key});
  }

  # if the values have no suffix (i.e. K, M or G), then they're going to be
  #  %age values - re-set the limit values to %age of available bandwidth
  $tx_limit = $recount{'ifSpeed'}*($tx_limit/100)
    if ($tx_limit =~ /^[0-9]+$/);
  $rx_limit = $recount{'ifSpeed'}*($rx_limit/100)
    if ($rx_limit =~ /^[0-9]+$/);

  # then run tests against each of the tx and rx values, triggering the
  #  de-referenced sub-routine if either of them trigger
  $levels{$key}($report)
    if (test($tx_limit, $tx) or test($rx_limit, $rx));
}

# if we've reached this stage, no errors have been triggered and so
#  it's safe to report that everything is OK. Return information and exit
#  with OK status value.
print 'OK '.$report;
exit($_status{'OK'});

# -----------------------------------------------------------------------------
# handle standard error message (due to incorrect configuration)
sub issue {
  $message = shift;
  print "ERR - $message\n" unless ($message eq '');
  usage(0);
}

# handle warning error messages
sub warning {
  # retrieve the error message and set a default if none given
  #  before outputting and exiting
  $message = (($message = shift) eq '' ? 'No error message given' : $message);
  print "WARNING $message\n";

  # make sure any SNMP session has been closed and exit with WARNING status
  $session->close();
  exit($_status{'WARNING'});
}

# handle critical error messages
sub critical {
  # retrieve the error message and set a default if none given
  #  before outputting and exiting
  $message = (($message = shift) eq '' ? 'No error message given' : $message);
  print "CRITICAL $message\n";

  # make sure any SNMP session has been closed and exit with CRITICAL status
  $session->close();
  exit($_status{'CRITICAL'});
}

# run the SNMP query to get the tx/rx data from the interface on the device
sub get_count {
  # prepare the list of OIDs that we're going to send to the SNMP agent
  my (@list) = (
    $_oid{'ifSpeed'}.$_options{'interface'},       # interface speed
    $_oid{'ifReceived'}.$_options{'interface'},    # received bits
    $_oid{'ifTransmitted'}.$_options{'interface'}, # transmitted bits
    $_oid{'ifDescription'}.$_options{'interface'}  # interface name/description
  );

  # run the SNMP query, and throw a CRITICAL error message if we can't get
  #  any data back from the device
  critical('Values unavailable for interface '.$_options{'interface'})
    unless (defined($return = $session->get_request(-varbindlist=>[@list])));

  # create and return a hash table with all the values returned
  return my %values = (
    # the time of the data needs to be recorded along with the data itself
    'ifTimeStamp'   => time,
    # if the speed has been overridden at the command line, ignore the value
    #  from the SNMP data returned by the device
    'ifSpeed'       => ($_options{'override'} ?
                        $_options{'override'} : $return->{$list[0]}),
    # use the remaining data as normal
    'ifReceived'    => $return->{$list[1]},
    'ifTransmitted' => $return->{$list[2]},
    'ifDescription' => $return->{$list[3]}
  );
}

# run an SNMP query to check that the network interface is connectect and
#  enabled
sub check_up {
  # prepare the list of OIDs that we're going to send to the SNMP agent
  my (@list) = (
    $_oid{'ifConnected'}.$_options{'interface'},   # interface connected
    $_oid{'ifEnabled'}.$_options{'interface'},     # interface enabled
  );

  # run the SNMP query, and throw a CRITICAL error message if we can't get
  #  any data back from the device
  critical('Values unavailable for interface '.$_options{'interface'})
    unless (defined($return = $session->get_request(-varbindlist=>[@list])));

  # this time throw a CRITICAL error message only if both values are not
  #  true (i.e. the interface is not connected and/or not enabled)
  critical(
    'Interface '.$_options{'interface'}.' not enabled ('.
    ($return->{$list[0]} ? 'Up' : 'Down').', '.
    ($return->{$list[0]} ? 'Up' : 'Down').')'
  ) unless ($return->{$list[0]} && $return->{$list[1]});
}

# take a bits value and convert it into human-readable format for output
#  to Nagios(r) (doesn't change the original data used for calculations)
sub adjust {
  # register the variables we're going to need
  our ($value, $ext);

  # get the value we're going to report (and convert it into bytes if
  #  requested by the --bytes command-line argument)
  $value = ($_options{'use-bytes'} ? (shift)/8 : shift);

  # if the --use-mega command-line option has been supplied, force all
  #  conversion to multipuls of megabyte or megabit.
  if ($_options{'use-mega'}) {
    $value = ($value/(1024*1024));
    $ext   = 'M';

  # otherwise keep diving by 1024 while we still have suffixes available,
  #  until we have a human-readable number (i.e. between 1 and 1024)
  } else {
    my (@exts) = qw(K M G E P);
    while ($value > 1024 && scalar @exts > 0) {
      $value = ($value/1024);
      $ext = shift @exts;
    }
  }

  # return the value, formatting to 2 decimal places and correct termonology
  #  for bits and bytes
  return sprintf('%0.2f%s', $value, $ext.($_options{'use-bytes'} ? 'B' : 'b'));
}

# take the value and the limit and compare so see if we've passed it
sub test {
  # register the variables we're going to need
  our($limit, $multi, $value);

  # get the supplied limit
  $limit = shift;
  # get the value we're going to report (and convert it into bytes if
  #  requested by the --bytes command-line argument)
  $value = ($_options{'use-bytes'} ? (shift)/8 : shift);

  # if the limit has a suffex, it not yet an absolute value which can be
  #  compared - convert it back to bits based on the suffix
  if ($limit =~ /^[0-9]+[KMGEP]$/) {
    # get the suffix
    $multi = chop($limit);
    # and do the calculation
    if    ($multi eq 'K') { $limit = $limit*(1024);    }
    elsif ($multi eq 'M') { $limit = $limit*(1024**2); }
    elsif ($multi eq 'G') { $limit = $limit*(1024**3); }
    elsif ($multi eq 'E') { $limit = $limit*(1024**4); }
    elsif ($multi eq 'P') { $limit = $limit*(1024**5); }
  }

  # test the value against the limit - only return true of the value has
  #  passed the limit
  return ($value > $limit);
}

# -----------------------------------------------------------------------------
# program usage guidelines
sub usage {
  # get and test to see if we need to display the full help output
  my $full = (shift == 1 ? 1 : 0);

  # output about and usage (depending on $full above)
  print "check_bandwidth v1.0.1\n Copyright 2007 Jonathan Wright <jonathan\@jabwebsolutions.co.uk>\n\n".
    "Poll (via SNMP) a network port and calculate bandwidth usage\n\n"
    if $full;
  print "Usage: check_bandwidth --hostname hostname --interface name\n".
    "         [--community name] [--timeout seconds] [--pause seconds]\n".
    "         [--override bits/sec] [--warning value] [--critical value]\n".
    "         [--check-up] [--on-the-fly] [--bytes] [--use-mega]\n";
  print "       see --help for further information\n"
    unless $full;
  print "\nOptions:\n".
    " -h, --help\n".
    "    Display this help message\n".
    " -H, --host STRING\n".
    "    IP address or hosting of (remote) device to query\n".
    " -i, --interface STRING\n".
    "    Name or number of the interface to be queried\n".
    " -C, --community STRING            (default ".$_options{'community'}.")\n".
    "    SNMP community name to use for polling\n".
    " -t, --timeout INTEGER             (default ".$_options{'timeout'}."s)\n".
    "    Set the timeout value for communications with host via SNMP\n".
    " -p, --pause INTEGER               (default ".$_options{'timeout'}."s)\n".
    "    Set the length of the pause when calculating results on-the-fly\n".
    "    (or between first checks when no cache value exists)\n".
    " -o, --override INTEGER            (default ".($_options{'override'}?$_options{'override'}:'Off').")\n".
    "    Override the maximum throughput available on selected port\n".
    " -f, --on-the-fly                  (default ".($_options{'on-the-fly'}?'On':'Off').")\n".
    "    Perform all calculation of bandwidth on-the-fly and don't use store\n".
    " -u, --check-up                    (default ".($_options{'check-up'}?'On':'Off').")\n".
    "    Set the timeout value for communications with host via SNMP\n".
    " -w, --warning INTEGER[,INTEGER]   (default ".$_options{'warning'}.")\n".
    "    Value at which to trigger WARNING. Default is %age of available bandwidth\n".
    "    Single value is for both RX & TX - to specify different limits, use RX,TX\n".
    "    (if K/M/G appended, treated as absolute value in bits (or --bytes)\n".
    " -c, --critical INTEGER[,INTEGER]  (default ".$_options{'critical'}.")\n".
    "    Value at which to trigger CRITICAL. Same semantics as --warning above\n".
    " -b, --use-bytes                   (default ".($_options{'use-bytes'}?'On':'Off').")\n".
    "    Use bytes instead of bits in all calculations (i.e. Megabytes not Megabits)\n".
    " -m, --use-mega                    (default ".($_options{'use-mega'}?'On':'Off').")\n".
    "    Force use of Megabit/Megabyte in all output (don't use Kilo or Giga)\n".
    "\n"
    if $full;

  exit($_status{'UNKNOWN'});
}
